* Figure 6: Probability Scatter Plot - Digital Calls
use "AllDerivatives", clear
* Make sure that all outcomes occur within a bin
replace binlb1=-99999
qui for num 1/18: replace binubX=99999 if binlbX==rightbin
qui for num 1/18: gen winX=(binlbX<=releasedval & binubX>releasedval)
qui for num 1/18: gen cumwinX=releasedval<binubX if binubX~=.
gen cumprob1=probbin1
qui for X in num 1/17 \ Y in num 2/18: gen cumprobY=cumprobX+probbinY if probbinY~=.
keep statistic dtrelease win* probbin* cumwin* cumprob*
reshape long win probbin cumwin cumprob, i(statistic dtrelease) j(bin)
replace cumprob=1 if cumprob>1

gen cumprice=round(cumprob,.05)
egen cumwinper=mean(cumwin), by(cumprice)
egen nwinper=count(cumwin), by(cumprice)
gen sewinper=sqrt(cumwinper*(1-cumwinper)/nwinper)
egen cumpriceav=mean(cumprob), by(cumprice)
egen tag=tag(cumprice)
replace cumpriceav=1-cumpriceav
replace cumwinper=1-cumwinper
gen cumwinperng=cumwinper-1.96*sewinper
gen cumwinperpl=cumwinper+1.96*sewinper
#delimit ;
twoway 
	(rarea cumwinperng cumwinperpl cumpriceav if tag==1, sort blcolor(gs14) bfcolor(gs14)) 
	(line cumpriceav cumpriceav if tag==1, sort clwidth(thin)) 
	(scatter cumwinper cumpriceav if tag==1)
,
	ytitle(Empirical Strike Rate) 
	ylabel( 0 "0%" .2 "20%" .4 "40%" .6 "60%" .8 "80%" 1 "100%", angle(horizontal)) 
	xtitle("Call Price" "(Price of an option paying $1 if economic statistic > strike price)")
	xlabel( 0 "$0.00" .2 "$0.20" .4 "$0.40" .6 "$0.60" 0.8 "$0.80" 1 "$1.00") 
	title(Auction Prices and Probabilities: Digital Calls) 
	subtitle(Aggregating data across all auctions into 20 call price bins) 
	legend(ring(0) pos(11) rows(3) region(fcolor(none) lcolor(none)) order(3 "Point Estimates: Proportion of options that strike" 1 "95% Confidence Interval (Binomial Dist.)" 2 "45-degree line: Unbiased"))
	xsize(10) ysize(7.5)	
;
#delimit cr
pause

******** Probability Integral Transforms
use "AllDerivatives", clear
gen z=0
qui for num 1/18: replace z=z+probbinX if releasedvalue>binubX \ replace z=z+probbinX*(releasedvalue-binlbX)/(binubX-binlbX) if releasedvalue<=binubX & releasedvalue>binlbX
la var z "Realized quantile"

****** Figure 7: Histogram of Realized Quantiles
egen n=count(z)
gen cilower=.
gen ciupper=.
qui for num 0/10: replace cilower=X if Binomial(n,X,.1)>.975
qui for num 50/10: replace ciupper=X if Binomial(n,X,.1)<.025
#delimit ;
twoway 
	(histogram z, bin(10) frequency start(0)) 
	(line cilower z, sort clcolor(cranberry) clpat(longdash)) 
	(line ciupper z, sort clcolor(cranberry) clpat(longdash))
,
	ytitle(Number of Observations) ylabel(0(5)25, angle(horizontal)) 
	xtitle("Realized Quantiles (or Probability Integral Transform): z=P(y)" "Price of the cheapest digital put that paid $1") 
	title(Histogram: Realized Quantiles) 
	caption("Dashed lines represent critical values of test that data ~Uniform(0,1)", size(medium) position(12) ring(0))
	legend(off)
	xsize(10) ysize(7.5)
;
#delimit cr
pause

****** Figure 8: CDF of Realized Quantiles
ksmirnov z=z
sort z
gen y=_n
egen county=count(y)
replace y=y/county
* Figure out Kolmogorov-Smirnov bounds
gen pval=1
gen random=.
gen dcritical=.
qui while abs(pval-.05)>.0005 {
	replace random=uniform()
	ksmirnov random=random
	replace pval=r(p_cor)
	replace dcritical=r(D)
}
drop pval random
la var dcritical "Critical Values: Kolmogorov-Smirnov test at 95%"
gen upper=z+dcritical
gen lower=z-dcritical
#delimit ;
twoway 
	(line y z, clwidth(medthick)) 
	(line z z, sort clcolor(cranberry) clpat(solid)) 
	(line upper z if upper<1, sort clcolor(cranberry) clpat(longdash)) 
	(line lower z if lower>0, sort clcolor(cranberry) clpat(longdash))
,
	ytitle(CDF of Realized Quantile: Realized proportion F(z)) 
	ylabel(, angle(horizontal)) 
	xtitle("Realized Quantile: z=Price(Outcome)") 
	title(CDF: Realized Quantiles) 
	legend(ring(0) pos(11) rows(3) region(fcolor(none) lcolor(none)) order(1 "Cumulative Distribution of Realized Quantiles" 2 "CDF of Uniform Distribution" 3 "95% critical values:" "Kolmogorov-Smirnov test of Uniformity"))
	xsize(10) ysize(7.5)
;
#delimit cr
pause

* Figure 9: CDF of Realized Quantiles by Data Release
sort stat z
egen y2=rank(z), by(stat)
egen county2=count(y2), by(stat)
replace y2=y2/county2
* Figure out Kolmogorov-Smirnov bounds
gen dcritical2=.
gen pval=1
gen error=1
gen diff=.
gen random=.
qui while error>.0005 {
	replace random=uniform()
	for X in any iclm nfpay rsxaut napm: ksmirnov random=random if statistic=="X" \ /*
		*/ replace dcritical2=r(D) if diff==error & statistic=="X" \ /*
		*/ replace pval=r(p_cor) if diff==error & statistic=="X"
	replace diff=abs(pval-.05)
	drop error
	egen error=max(diff)
}
drop pval error diff
la var dcritical2 "Critical Values: Kolmogorov-Smirnov test at 90%"
gen upper2=z+dcritical2
gen lower2=z-dcritical2
#delimit ;
twoway 
	(line y2 z, clwidth(medthick)) 
	(line z z, sort clcolor(cranberry) clpat(solid)) 
	(line upper2 z, sort clcolor(cranberry) clpat(longdash)) 
	(line lower2 z, sort clcolor(cranberry) clpat(longdash))
,
	by(stat, 
		title(CDF: Realized Quantiles) 
		subtitle("Dashed lines show 95% Kolmogorov-Smirnov critical values under null z~U(0,1)") 
		legend(off)) 
	ytitle(CDF of Realized Quantile: Realized proportion F(z)) 
	ylabel(, angle(horizontal)) 
	xtitle("Realized Quantile: z=Price(Outcome)") 
	caption(, size(medium) position(12) ring(0))
	xsize(10) ysize(7.5)
;
#delimit cr
pause

* Figure 10
#delimit ;
twoway
	(line z dtrelease, sort)
,
	by(stat,
		noyrescale
		title("Time Series: Realized Quantiles") 
		legend(off)) 
	ytitle("Realized Quantile z=P(y):" "Market's CDF of Realized Outcome") 
	ylabel(, angle(horizontal)) 
	xtitle("Date (Data Release)") 
	xlabel(, format(%dn/Y)) 
	caption(, size(medium) position(12) ring(0))
	legend(off) 
	xsize(10) ysize(7.5)
;
#delimit cr
pause

* Table 8: Berkowitz Analysis
gen zinv=invnorm(z)
replace zinv=-3 if z==0
replace zinv=3 if z==1
sort statno time
tsset statno time


program drop _all
program define berkowitz
	version 6
	args lnf mu sigmasq rho
	tempvar lag
	quietly generate `lag'=l.$ML_y1
	quietly replace `lnf'=-0.5*(ln(2*_pi)+ln(`sigmasq'/(1-`rho'*`rho'))+(( ($ML_y1-(`mu'/(1-`rho')))^2 )/( `sigmasq'/(1-`rho'*`rho') )) ) if `lag'==.
	quietly replace `lnf'= -0.5*ln(`sigmasq')  -(($ML_y1-`mu'-`rho'*`lag')^2)/(2*`sigmasq') if `lag'~=.
end

sort statno time
ml model lf berkowitz (mean: zinv=) /sigmasq /rho 
ml max
test [sigmasq]_cons==1
gen ll1=e(ll)

gen double mu=0
gen double sigmasq=1
gen double rho=0
gen double zinvlag=l.zinv
gen double lnf=-0.5*(ln(2*_pi)+ln(sigmasq/(1-rho*rho))+(( (zinv-(mu/(1-rho)))^2 )/( sigmasq/(1-rho*rho) )) ) if zinvlag==.
replace lnf= -0.5*ln(sigmasq)  -((zinv-mu-rho*zinvlag)^2)/(2*sigmasq) if zinvlag~=.
egen double ll0=sum(lnf)
gen lr=-2*(ll0-ll1)
gen pval=chi2tail(3,lr)
summ ll0 ll1 lr pval /* This yields the pvalue on the restriction imposed; using 3 degrees of freedom*/
drop ll1 ll0 lr pval

for X in any nfpay napm rsxaut iclm: /*
	*/ ml model lf berkowitz (mean: zinv=) /sigmasq /rho if statistic=="X" \ /*
	*/ ml max \ test [sigmasq]_cons==1 \ gen ll1=e(ll) \ egen ll0=sum(lnf) if statistic=="X" \ gen lr=-2*(ll0-ll1) \ gen pval=chi2tail(3,lr) \ /*
	*/ summ ll0 ll1 lr pval \ drop ll0 ll1 lr pval
